In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
%matplotlib inline
In [2]:
data = loadmat('Tut8_file1.mat')
locals().update(data)
data.keys()
T, p = state.shape
alpha = .1
beta = 1.
index = np.zeros((3, 3), dtype=int)
index[1, 1] = 0
index[2, 1] = 1
index[1, 2] = 2
index[2, 2] = 3
In [3]:
def compute_V(alpha):
V = np.zeros((4, T + 1))
i = index[state[0], response[0]][0]
for t in range(T):
V[:, t + 1] = V[:, t]
i = index[state[t], response[t]][0]
V[i, t + 1] += alpha * (reward[t] - V[i, t])
return V
In [4]:
V = compute_V(.1)
plt.plot(V.T)
plt.legend(['V_11', 'V_12', 'V_21', 'V_22'])
Out[4]:
It's clear that something changed at about the 70th time step, what happend is that the scientist changed the rule for the second "state", so that using as a response the same lever would be wrong and the opposite correct. We can see how in the case for the first state that does not happen.
In [5]:
np.vstack((reward[np.logical_and(state == response, state == 2)], np.arange(T)[np.logical_and(state == response, state == 2).flatten()]))
Out[5]:
In [ ]:
reward[np.logical_and(state == response , state == 1)]
Out[ ]:
In [ ]:
alphas = np.arange(0., 1., .01)
betas = np.arange(0., 3., .01)
LL = np.zeros((len(alphas), len(betas)))
for j, alpha in enumerate(alphas):
V = compute_V(alpha)
for k, beta in enumerate(betas):
for t in range(0, T):
i = index[state[t], response[t]][0]
i_s = index[state[t], 1:]
LL[j, k] += beta * V[i, t] - np.log(np.sum(np.exp(beta * V[i_s, t])))
In [ ]:
X, Y = np.meshgrid(betas, alphas)
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, LL, cmap=cm.coolwarm,
linewidth=0, antialiased=False)
plt.xlabel('beta')
plt.ylabel('alpha')
plt.figure()
plt.rcParams['figure.figsize'] = [7.0, 10.0]
plt.imshow(LL[::-1], extent=(0, 3, 0, 1))
plt.colorbar(orientation='horizontal')
plt.xlabel('beta')
plt.ylabel('alpha')
In [ ]:
max_a, max_b = np.unravel_index(LL.argmax(), LL.shape)
print('The alpha that maximizes the loglikelihood is:', alphas[max_a])
print('The beta that maximizes the loglikelihood is:', betas[max_b])
In [ ]: